# encoding: utf-8
""" 
@version: v1.0 
@author: autumner 
@license: Apache Licence  
@contact: 18322313385@163.com 
@site: https://gitee.com/autumner/pythoncookbook.git 
@software: PyCharm Community Edition 
@file: getNgchinaImage.py 
@time: 18-8-7 下午1:28
"""
'''
获取国家地理中文网页面图片-程序设计->自顶向下设计
步骤1：读取保存在本地的html文件;
def getHTMLlines():
    pass
步骤2：解析并提取其中的图片链接;
def extractImageUrls():
    pass
步骤3：输出提取结果到屏幕;
def showResults():
    pass
步骤4：保存提取结果为文件;
def saveResults():
    pass
'''


def getHTMLlines(htmlpath):
    f = open(htmlpath, 'r', encoding='utf-8')
    ls = f.readlines()
    f.close()
    return ls


def extractImageUrls(htmllist):
    urls = []
    for line in htmllist:
        if 'img' in line:
            url = line.split('src=')[-1].split('"')[1]
            if 'http' in url:
                urls.append(url)
    return urls


def showRseults(urls):
    count = 1
    for url in urls:
        print('第{:02}个URL:{}'.format(count, url))
        count += 1


def saveResults(filepath, urls):
    f = open(filepath, 'w')
    for url in urls:
        f.write('wget -c ' + url + '\n')
    f.close()


def main():
    inputfile = 'ngchina.html'
    outputfile = 'ngchina_urls.txt'
    htmlLines = getHTMLlines(inputfile)
    imageUrls = extractImageUrls(htmlLines)
    showRseults(imageUrls)
    saveResults(outputfile, imageUrls)


main()
